#### Combine municipality-level results in a non-Bayesian way

### Prepare environment

setwd("~/research/coffee-dataverse/src")

do.origspec <- T # For determining priors
source("load.R", encoding="iso-8859-1")

### Load the muni-level data
output.suffix <- "-eq24-orig"

allfits <- read.csv(paste0("../data/allfits", output.suffix, ".csv"))
sumstats <- read.csv(paste0("../data/sumstats", output.suffix, ".csv"))

source("load-single.R")

load(paste0("../data/combined-betagamma", output.suffix, ".RData")) # la
library(rstan)

library(reshape2)
taus <- la$tau
colnames(taus) <- names(mcmc)[-grep("_fe", names(mcmc))]
taudf <- melt(taus)
names(taudf)[2] <- "param"

## Version 1: Try just normal pooling
## Use Bayesian hierarchical math, but find a tau such that pooled uncertainty equals average uncertainty...
pdf <- data.frame()
for (param in unique(taudf$param)) {
    print(param)
    maxtau <- max(taudf$value[taudf$param == param])
    rows <- sumstats$param == param & !is.na(sumstats$mean) & is.finite(sumstats$sd)
    for (tau in seq(0, 2*maxtau, length.out=100)) {
        ## hyper.mu <- sum(sumstats$neff[rows] * sumstats$mean[rows] / ((sumstats$Rhat[rows] * sumstats$sd[rows])^2 + tau^2)) / sum(sumstats$neff[rows] / ((sumstats$Rhat[rows] * sumstats$sd[rows])^2 + tau^2))
        ## hyper.sd <- sqrt(1 / sum(1 / ((sumstats$Rhat[rows] * sumstats$sd[rows])^2 + tau^2)))

        hyper.mu <- sum(sumstats$mean[rows] / ((sumstats$sd[rows])^2 + tau^2)) / sum(1 / ((sumstats$sd[rows])^2 + tau^2))
        hyper.sd <- sqrt(1 / sum(1 / ((sumstats$sd[rows])^2 + tau^2)))

        pdf <- rbind(pdf, data.frame(param, tau, hyper.mu, hyper.sd))
    }
}

library(ggplot2)

pdf$param <- factor(pdf$param, levels=c('yield_coeff1', 'yield_coeff2', 'yield_coeff3', 'yield_coeff4', 'yield_coeff5', 'death_coeff1', 'autoreg', 'pricememory', 'cost_harvest', 'cost_to_planting', 'scale_trend', 'yield_uncertainty', 'bearing_sigma', 'harvest_sigma', 'production_sigma'))

ggplot(pdf, aes(tau, hyper.mu)) +
    facet_wrap(~ param, scales="free", labeller=as_labeller(c('death_coeff1'="Die-off EDD effect", 'autoreg'="Planting autoreg.", 'pricememory'="Nerlove autoreg.", 'cost_harvest'="Cost of harvest", 'cost_to_planting'="Planting price resp.", 'scale_trend'="Exogenous trend", 'yield_coeff1'="Average Min.", 'yield_coeff2'="GDDs / 1000", 'yield_coeff3'="EDDs / 1000", 'yield_coeff4'="Precip. (m)", 'yield_coeff5'="Precip.�", 'death_coeff1'="Die-off EDD effect", 'yield_uncertainty'="Range of yields", 'bearing_sigma'="Bearing std. dev.", 'harvest_sigma'="Harvest std. dev.", 'production_sigma'="Production std. dev."))) +
    geom_line() + geom_ribbon(aes(ymin=hyper.mu - 1.96*hyper.sd, ymax=hyper.mu + 1.96*hyper.sd), alpha=.5) +
    geom_hline(yintercept=0, alpha=.5) +
    geom_vline(data=taudf %>% group_by(param) %>% summarize(x=mean(value)), aes(xintercept=x)) +
    scale_x_continuous(expand=c(0, 0)) + ylab("Parameter value") + ylab("Tau pooling parameter")
ggsave("../figures/alts//taupooling.pdf", width=9, height=7)


## Match up exactly with full analysis
load(paste0("../data/allvals", output.suffix, ".RData")) # allmus, allses

pdf <- data.frame()
for (param in unique(taudf$param)) {
    print(param)
    maxtau <- max(taudf$value[taudf$param == param])
    col <- which(names(mcmc) == param)
    rows <- !is.na(allmus[, col]) & is.finite(allses[, col])
    for (tau in seq(0, 2*maxtau, length.out=100)) {
        hyper.mu <- sum(allmus[rows, col] / ((allses[rows, col])^2 + tau^2)) / sum(1 / ((allses[rows, col])^2 + tau^2))
        hyper.sd <- sqrt(1 / sum(1 / ((allses[rows, col])^2 + tau^2)))

        pdf <- rbind(pdf, data.frame(param, tau, hyper.mu, hyper.sd))
    }
}

pdf$param <- factor(pdf$param, levels=c('yield_coeff1', 'yield_coeff2', 'yield_coeff3', 'yield_coeff4', 'yield_coeff5', 'death_coeff1', 'autoreg', 'pricememory', 'cost_harvest', 'cost_to_planting', 'scale_trend', 'yield_uncertainty', 'bearing_sigma', 'harvest_sigma', 'production_sigma'))

mylabels <- c('death_coeff1'="Die-off EDD effect", 'autoreg'="Planting autoreg.", 'pricememory'="Nerlove autoreg.", 'cost_harvest'="Cost of harvest", 'cost_to_planting'="Planting price resp.", 'scale_trend'="Exogenous trend", 'yield_coeff1'="Average Min.", 'yield_coeff2'="GDDs / 1000", 'yield_coeff3'="EDDs / 1000", 'yield_coeff4'="Precip. (m)", 'yield_coeff5'="Precip.�", 'death_coeff1'="Die-off EDD effect", 'yield_uncertainty'="Range of yields", 'bearing_sigma'="Bearing std. dev.", 'harvest_sigma'="Harvest std. dev.", 'production_sigma'="Production std. dev.")

ggplot(pdf, aes(tau, hyper.mu)) +
    facet_wrap(~ param, scales="free", labeller=as_labeller(mylabels)) +
    geom_line() + geom_ribbon(aes(ymin=hyper.mu - 1.96*hyper.sd, ymax=hyper.mu + 1.96*hyper.sd), alpha=.5) +
    geom_hline(yintercept=0, alpha=.5) +
    geom_vline(data=taudf %>% group_by(param) %>% summarize(x=mean(value)), aes(xintercept=x)) +
    scale_x_continuous(expand=c(0, 0)) + ylab("Parameter value") + ylab("Tau pooling parameter")
ggsave("../figures/alts//taupooling.pdf", width=9, height=7)

## Make a table
comboparams <- names(mcmc)[-grep("_fe", names(mcmc))]

tbl <- data.frame()
for (param in unique(taudf$param)) {
    col <- which(names(mcmc) == param)
    rows <- is.finite(allmus[, col]) & is.finite(allses[, col])
    pooled <- sum(allmus[rows, col] / ((allses[rows, col])^2)) / sum(1 / ((allses[rows, col])^2))
    unpooled <- mean(allmus[rows, col])
    tau <- mean(taudf$value[taudf$param == param])
    partial0 <- sum(allmus[rows, col] / ((allses[rows, col])^2 + tau^2)) / sum(1 / ((allses[rows, col])^2 + tau^2))
    partial <- mean(la$gamma[, 1, which(comboparams == param)])

    if (param == 'pricememory') {
        pooled <- 1 - pooled
        unpooled <- 1 - unpooled
        partial0 <- 1 - partial0
        partial <- 1 - partial
    }

    tbl <- rbind(tbl, data.frame(param, pooled, partial0, partial, unpooled))
}

tbl2 <- tbl %>% left_join(taudf %>% group_by(param) %>% summarize(tau=mean(value)))

library(xtable)
tbl3 <- tbl2
tbl3$param <- sapply(tbl3$param, function(x) mylabels[x])
print(xtable(tbl3), include.rownames=F)

ggplot(pdf, aes(tau, hyper.mu)) +
    facet_wrap(~ param, scales="free", labeller=as_labeller(mylabels)) +
    geom_line() + geom_ribbon(aes(ymin=hyper.mu - 1.96*hyper.sd, ymax=hyper.mu + 1.96*hyper.sd), alpha=.5) +
    geom_hline(yintercept=0, alpha=.5) +
    geom_vline(data=tbl2, aes(xintercept=tau)) +
    geom_point(data=tbl2, aes(tau, partial), shape=1) +
    scale_x_continuous(expand=c(0, 0)) + ylab("Parameter value") + ylab("Tau pooling parameter") +
    theme_bw()
ggsave("../figures/alts//taupooling.pdf", width=9, height=7)
